library ( cleaningtools )
library ( dplyr )
my_raw_dataset <- cleaningtools :: cleaningtools_raw_data
my_kobo_survey <- cleaningtools :: cleaningtools_survey
my_kobo_choice <- cleaningtools :: cleaningtools_choices
my_filled_log <- readxl :: read_excel ( "../inputs/02 - example - cleaning-log-with-kobo - filled.xlsx" , sheet = 2 )
my_clean_data <- create_clean_data ( raw_dataset = my_raw_dataset ,
raw_data_uuid_column = "X_uuid" ,
cleaning_log = my_filled_log ,
cleaning_log_uuid_column = "uuid" ,
cleaning_log_question_column = "question" ,
cleaning_log_new_value_column = "new_value" ,
cleaning_log_change_type_column = "change_type" )
my_clean_data2 <- recreate_parent_column ( dataset = my_clean_data ,
uuid_column = "X_uuid" ,
kobo_survey = my_kobo_survey ,
kobo_choices = my_kobo_choice ,
sm_separator = "." ,
cleaning_log_to_append = my_filled_log )
review_others
En el registro de limpieza, algunos valores de texto abierto se cambian a vacío. Algunas preguntas de texto abierto están vinculadas a una lógica condicional, es decir, ¿qué es X? Otro, por favor especifique. En algunos casos, algunos valores deben ser cambiados. En el siguiente ejemplo, se cambio el valor de water_supply_other_neighbourhoods_why * de la encuesta uuid 019bc718-c06a-46b8-bba8-c84f6c6efbd5 .
my_filled_log %>%
filter ( question == "water_supply_other_neighbourhoods_why" ,
change_type == "blank_response" )
019bc718-c06a-46b8-bba8-c84f6c6efbd5
لا اعلم
water_supply_other_neighbourhoods_why
recode other
NA
water_supply_other_neighbourhoods_why / 019bc718-c06a-46b8-bba8-c84f6c6efbd5
blank_response
NA
12
En la herramienta KOBO se puede notar una logical condicional basada en la variable water_supply_other_neighbourhoods .
my_kobo_survey %>%
filter ( name == "water_supply_other_neighbourhoods_why" ) %>%
select ( type , name , relevant )
text
water_supply_other_neighbourhoods_why
selected(\({water_supply_other_neighbourhoods},'somewhat_worse') or selected(\) {water_supply_other_neighbourhoods},‘much_worse’)
my_clean_data %>%
filter ( X_uuid == "019bc718-c06a-46b8-bba8-c84f6c6efbd5" ) %>%
select ( water_supply_other_neighbourhoods , water_supply_other_neighbourhoods_why )
¿Debe cambiarse el valor de water_supply_other_neighbourhoods? Depende de la pregunta y de la lógica condicional, pero es importante señalarlos para que se pueda tomar una decisión.
review_other_log <- review_others ( dataset = my_clean_data2 $ data_with_fix_concat ,
uuid_column = "X_uuid" ,
kobo_survey = my_kobo_survey ,
columns_not_to_check = "consent_telephone_number" )
Warning in create_logic_for_other(kobo_survey = kobo_survey,
compare_with_dataset = TRUE, : The following parent names: well_quality,
spring_quality, rainwater_quality, surface_quality, why_not_connected were not
found in the dataset. The function is ignoring them.
review_cleaning
my_deletion_log <- my_clean_data2 $ cleaning_log %>%
filter ( change_type == "remove_survey" )
my_filled_log_no_deletion <- my_clean_data2 $ cleaning_log %>%
filter ( change_type != "remove_survey" ) %>%
filter ( ! uuid %in% my_deletion_log $ uuid )
review_of_cleaning <- review_cleaning ( raw_dataset = my_raw_dataset ,
raw_dataset_uuid_column = "X_uuid" ,
clean_dataset = my_clean_data2 $ data_with_fix_concat ,
clean_dataset_uuid_column = "X_uuid" ,
cleaning_log = my_filled_log_no_deletion ,
cleaning_log_uuid_column = "uuid" ,
cleaning_log_question_column = "question" ,
cleaning_log_new_value_column = "new_value" ,
cleaning_log_change_type_column = "change_type" ,
cleaning_log_old_value_column = "old_value" ,
deletion_log = my_deletion_log ,
deletion_log_uuid_column = "uuid"
)